library(ggplot2)
metadata = read.csv("data/MGRB_paper_sample_metadata_20180713.csv", stringsAsFactors = FALSE)
metadata$cohort[metadata$cohort == "SweGen_NSPHS"] = "SweGen"
metadata$cohort[metadata$cohort == "SweGen_STR"] = "SweGen"
metadata = metadata[
(metadata$cohort %in% c("ASPREE", "45andUp") &
metadata$Phase2.SampleQCTier %in% c(1, 2) &
metadata$successfullySequenced == 1 &
metadata$Phase2.RelatednessDropped == 0) |
(metadata$cohort %in% c("Cairns")) &
!apply(is.na(metadata[,c("isFemale", "AgeAtCollectionYears")]), 1, any),]
metadata$Frailty_GaitSpeedMs = 3/metadata$Frailty_GaitSpeed
metadata$sex = factor(c("male", "female")[metadata$isFemale + 1])
ggplot(metadata, aes(x = xCNLowDepth, y = yCNLowDepth, col = sex, pch = cohort)) + geom_point()
ggplot(metadata, aes(x = xCNLowDepth, y = yCNLowDepth, col = sex)) + geom_point() + facet_wrap(~ cohort)
One probable XXX. There are a number of possible XYs present also, but they have been removed by the successfullySequenced == 1 and !is.na(metadata$isFemale) filters. Some ASRB have lower X CN than the other cohorts, and ASRB in general has higher Y CN than the others; probable technical bias. This won’t affect the subsequent frailty comparisons as these are internal to ASPREE only.
ggplot(metadata, aes(x = AgeAtCollectionYears, y = TelSeqFinalRecalibrated, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = SomaSNV_S3_LD, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 178 rows containing non-finite values (stat_smooth).
## Warning: Removed 178 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = mtCNLowDepth, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
ggplot(metadata, aes(x = AgeAtCollectionYears, y = xCNLowDepth, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
ggplot(metadata, aes(x = AgeAtCollectionYears, y = yCNLowDepth, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
ggplot(metadata, aes(x = AgeAtCollectionYears, y = mity_LowDepth_NumVars, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Removed 204 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = SomaCNV_ClonalFraction, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 1177 rows containing non-finite values (stat_smooth).
## Warning: Removed 1177 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = CH_any_10pct, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "gam", formula = y ~ x, method.args = list(family = "binomial")) + theme_bw()
## Warning: Removed 1177 rows containing non-finite values (stat_smooth).
## Warning: Removed 1177 rows containing missing values (geom_point).
ggplot(metadata[metadata$CH_any_10pct == 1,], aes(x = AgeAtCollectionYears, y = SomaCNV_ClonalFraction, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 1177 rows containing non-finite values (stat_smooth).
## Warning: Removed 1177 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GripStrength, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess") + theme_bw()
## Warning: Removed 1494 rows containing non-finite values (stat_smooth).
## Warning: Removed 1494 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GaitSpeedMs, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess") + theme_bw()
## Warning: Removed 1472 rows containing non-finite values (stat_smooth).
## Warning: Removed 1472 rows containing missing values (geom_point).
library(corrplot)
## corrplot 0.84 loaded
corrplot(cor(metadata[metadata$sex == "male", c("TelSeqFinalRecalibrated", "SomaSNV_S3_LD", "mtCNLowDepth", "yCNLowDepth", "mity_LowDepth_NumVars", "Frailty_GripStrength", "Frailty_GaitSpeedMs")], method = "kendall", use = "pairwise.complete.obs"), method = "square", main = "Males")
corrplot(cor(metadata[metadata$sex == "female", c("TelSeqFinalRecalibrated", "SomaSNV_S3_LD", "mtCNLowDepth", "mity_LowDepth_NumVars", "Frailty_GripStrength", "Frailty_GaitSpeedMs")], method = "kendall", use = "pairwise.complete.obs"), method = "square", main = "Females")
Some correlation, but no strong colinearity.
Use some simple GAM fits to aid in plotting, factoring out the cohort intercepts. We add a negligible amount of noise (+/- 6 months) to the age variable to prevent false positives with gam.check.
library(mgcv)
set.seed(314159)
metadata$AgeAtCollectionYears.jittered = metadata$AgeAtCollectionYears - 0.5 + runif(nrow(metadata))
plotfit.telseq = gam(TelSeqFinalRecalibrated ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.telseq)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 5 iterations.
## The RMS GCV score gradient at convergence was 1.013275e-07 .
## The Hessian was positive definite.
## Model rank = 21 / 21
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.0 2.8 1.01 0.82
## s(AgeAtCollectionYears.jittered):sexmale 9.0 3.3 1.01 0.70
plotfit.soma_snv = gam(log(SomaSNV_S3_LD + 0.2) ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.soma_snv)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 5 iterations.
## The RMS GCV score gradient at convergence was 8.627284e-07 .
## The Hessian was positive definite.
## Model rank = 21 / 21
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.00 2.46 1.01 0.64
## s(AgeAtCollectionYears.jittered):sexmale 9.00 3.29 1.01 0.69
plotfit.mtcn = gam(log(mtCNLowDepth) ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.mtcn)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 9 iterations.
## The RMS GCV score gradient at convergence was 1.880056e-06 .
## The Hessian was positive definite.
## Model rank = 21 / 21
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.00 8.49 1 0.34
## s(AgeAtCollectionYears.jittered):sexmale 9.00 4.55 1 0.42
plotfit.ycn = gam(yCNLowDepth ~ s(AgeAtCollectionYears.jittered) + cohort, data = metadata[metadata$sex == "male",])
gam.check(plotfit.ycn)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 4 iterations.
## The RMS GCV score gradient at convergence was 6.975827e-08 .
## The Hessian was positive definite.
## Model rank = 12 / 12
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(AgeAtCollectionYears.jittered) 9.00 3.77 1.01 0.69
# Odd residual distribution -- not linked to cohort
plotfit.mitynvar = gam(log(mity_LowDepth_NumVars+1) ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.mitynvar)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 9 iterations.
## The RMS GCV score gradient at convergence was 1.159208e-06 .
## The Hessian was positive definite.
## Model rank = 21 / 21
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.00 7.36 1 0.56
## s(AgeAtCollectionYears.jittered):sexmale 9.00 5.69 1 0.65
Now construct the cohort intercept-corrected data for plotting, using the ASPREE intercepts as a reference:
temp = metadata
temp$cohort = "ASPREE"
metadata$plotfit.telseq = NA
metadata$plotfit.soma_snv = NA
metadata$plotfit.mtcn = NA
metadata$plotfit.ycn = NA
metadata$plotfit.mitynvar = NA
metadata$plotfit.telseq[!is.na(metadata$TelSeqFinalRecalibrated)] = predict(plotfit.telseq, newdata = temp[!is.na(metadata$TelSeqFinalRecalibrated),]) + residuals(plotfit.telseq)
metadata$plotfit.soma_snv[!is.na(metadata$SomaSNV_S3_LD)] = exp(predict(plotfit.soma_snv, newdata = temp[!is.na(metadata$SomaSNV_S3_LD),]) + residuals(plotfit.soma_snv) - 0.2)
metadata$plotfit.mtcn[!is.na(metadata$mtCNLowDepth)] = exp(predict(plotfit.mtcn, newdata = temp[!is.na(metadata$mtCNLowDepth),]) + residuals(plotfit.mtcn))
metadata$plotfit.ycn[!is.na(metadata$yCNLowDepth) & metadata$sex == "male"] = predict(plotfit.ycn, newdata = temp[!is.na(metadata$yCNLowDepth) & metadata$sex == "male",]) + residuals(plotfit.ycn)
metadata$plotfit.mitynvar[!is.na(metadata$mity_LowDepth_NumVars)] = exp(predict(plotfit.mitynvar, newdata = temp[!is.na(metadata$mity_LowDepth_NumVars),]) + residuals(plotfit.mitynvar) - 1)
Final cohort intercept-corrected plots. Note clipping in some cases to focus on functional form; only a minor fraction of points are lost.
ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.telseq, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(1, 5))
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.soma_snv, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + scale_y_continuous(trans = 'log10') + coord_cartesian(ylim = c(0.1, 30))
## Warning: Removed 178 rows containing non-finite values (stat_smooth).
## Warning: Removed 178 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.mtcn, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(0, 400))
ggplot(metadata[metadata$sex == "male",], aes(x = AgeAtCollectionYears, y = plotfit.ycn, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(0, 1))
ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.mitynvar, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(0, 10))
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Removed 204 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GripStrength, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw()
## Warning: Removed 1494 rows containing non-finite values (stat_smooth).
## Warning: Removed 1494 rows containing missing values (geom_point).
ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GaitSpeedMs, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw()
## Warning: Removed 1472 rows containing non-finite values (stat_smooth).
## Warning: Removed 1472 rows containing missing values (geom_point).
Directly test the hypothesis: somatic measures are significantly associated with fraility measures after conditioning for conventional variables. Use flexible GAM models; this necessitates permutation testing. Because of the number of tests (5 somatic variables in males, 4 in females, 2 frailty measure = 18 tests), use a training/validation split to alleviate MT concerns.
library(plyr)
library(doParallel)
registerDoParallel(28)
test.pval_threshold = 0.2
gam_permutation_test = function(formula, data, perm_vars, seed = 314159, B = 10000, ...)
{
fit_deviance = deviance(gam(formula = formula, data = data, ...))
perm_deviance = laply(1:B, function(i)
{
perm_data = data
set.seed(seed + i)
permutation = sample.int(nrow(perm_data), replace = FALSE)
for (perm_var in perm_vars)
perm_data[,perm_var] = perm_data[permutation,perm_var]
deviance(gam(formula = formula, data = perm_data, ...))
}, .parallel = TRUE)
nextr = sum(perm_deviance <= fit_deviance)
list(deviance = fit_deviance, deviance_perm = perm_deviance, p.value = (nextr+0.5)/(B+1))
}
metadata.aspree.complete = metadata[metadata$cohort == "ASPREE", c("sampleID", "Frailty_GripStrength", "Frailty_GaitSpeedMs",
"AgeAtCollectionYears", "TelSeqFinalRecalibrated", "SomaSNV_S3_LD", "mtCNLowDepth", "yCNLowDepth", "mity_LowDepth_NumVars",
"sex", "HtMtrs", "WtKgs", "AbdoCircCms", "GlcmmolL")]
metadata.aspree.complete = metadata.aspree.complete[!apply(is.na(metadata.aspree.complete), 1, any),]
metadata.aspree.complete$BMI = metadata.aspree.complete$WtKgs / metadata.aspree.complete$HtMtrs^2
set.seed(314159)
metadata.aspree.complete$split = sample(rep(c("training", "validation"), c(round(0.25*nrow(metadata.aspree.complete)), nrow(metadata.aspree.complete) - round(0.25*nrow(metadata.aspree.complete)))))
set.seed(314159)
metadata.aspree.complete$AgeAtCollectionYears.jittered = metadata.aspree.complete$AgeAtCollectionYears + runif(nrow(metadata.aspree.complete)) - 0.5
metadata.aspree.complete$Frailty_GripStrength.trans = metadata.aspree.complete$Frailty_GripStrength^0.7
metadata.aspree.complete$SomaSNV_S3_LD.trans = log(metadata.aspree.complete$SomaSNV_S3_LD + 1e-1)
metadata.aspree.complete$mtCNLowDepth.trans = sqrt(metadata.aspree.complete$mtCNLowDepth)
test.grip.telseq.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.grip.somas3.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.grip.ycn.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(yCNLowDepth), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("yCNLowDepth"))
test.grip.mtcn.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.grip.mitynvars.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))
test.grip.telseq.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.grip.somas3.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.grip.mtcn.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.grip.mitynvars.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))
test.gait.telseq.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.gait.somas3.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.gait.ycn.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(yCNLowDepth), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("yCNLowDepth"))
test.gait.mtcn.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.gait.mitynvars.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))
test.gait.telseq.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.gait.somas3.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.gait.mtcn.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.gait.mitynvars.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))
test.pvals = c(
"grip.telseq.male" = test.grip.telseq.male$p.value,
"grip.somas3.male" = test.grip.somas3.male$p.value,
"grip.ycn.male" = test.grip.ycn.male$p.value,
"grip.mtcn.male" = test.grip.mtcn.male$p.value,
"grip.mityn.male" = test.grip.mitynvars.male$p.value,
"grip.telseq.female" = test.grip.telseq.female$p.value,
"grip.somas3.female" = test.grip.somas3.female$p.value,
"grip.mtcn.female" = test.grip.mtcn.female$p.value,
"grip.mityn.female" = test.grip.mitynvars.female$p.value,
"gait.telseq.male" = test.gait.telseq.male$p.value,
"gait.somas3.male" = test.gait.somas3.male$p.value,
"gait.ycn.male" = test.gait.ycn.male$p.value,
"gait.mtcn.male" = test.gait.mtcn.male$p.value,
"gait.mityn.male" = test.gait.mitynvars.male$p.value,
"gait.telseq.female" = test.gait.telseq.female$p.value,
"gait.somas3.female" = test.gait.somas3.female$p.value,
"gait.mtcn.female" = test.gait.mtcn.female$p.value,
"gait.mityn.female" = test.gait.mitynvars.female$p.value
)
sort(test.pvals)
## grip.mtcn.male grip.telseq.male gait.mtcn.female
## 0.05114489 0.29272073 0.34691531
## grip.mityn.female grip.mityn.male gait.ycn.male
## 0.45720428 0.47580242 0.50059994
## grip.somas3.male gait.mtcn.male gait.telseq.female
## 0.50499950 0.54399560 0.55599440
## gait.mityn.female gait.somas3.male grip.ycn.male
## 0.60348965 0.60478952 0.72277772
## grip.somas3.female grip.mtcn.female gait.somas3.female
## 0.73007699 0.73197680 0.76697330
## grip.telseq.female gait.mityn.male gait.telseq.male
## 0.90785921 0.94065593 0.95195480
which(test.pvals <= test.pval_threshold)
## grip.mtcn.male
## 4
test.grip.mtcn.male.validation = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "validation",], perm_vars = c("mtCNLowDepth.trans"))
test.grip.mtcn.male.validation$p.value
## [1] 0.03564644
Examine the influence of mtDNA CN on grip strength in men. Express this as biological years vs calendar years for an 80 calendar-year-old man (the median age of men in ASPREE).
illustr.target_somatic_quant = unique(sort(c(0.499, 0.501, seq(0.01, 0.99, 0.01))))
illustr.ci_levels = c(0.5, 0.8, 0.9, 0.95)
illustr.R = 100000
illustr.target_age = 80
# Calculate the local quantile of each point in x. The local quantile
# of point i is defined as eCDF(x_i), where the eCDF is based on the
# x-values of all points within deltaz of z_i, not including point i
# itself: { x_j : z_i - deltaz <= z_j < z_i + deltaz, j != i }.
local_quantile = function(x, z, deltaz, group = rep(1, length(x)))
{
n = length(x)
as.data.frame(t(sapply(1:n, function(i) {
x_i = x[i]
z_i = z[i]
group_i = group[i]
neighbourhood_x = x[z >= z_i - deltaz & z < z_i + deltaz & 1:n != i & group == group_i]
neighbourhood_size = length(neighbourhood_x)
if (neighbourhood_size == 0)
lq = 0
else
lq = ecdf(neighbourhood_x)(x_i)
return(c(lq = lq, ls = neighbourhood_size))
})))
}
for (temp.var in c("Frailty_GripStrength", "Frailty_GaitSpeedMs", "SomaSNV_S3_LD", "mtCNLowDepth", "mity_LowDepth_NumVars", "yCNLowDepth", "TelSeqFinalRecalibrated"))
{
metadata.aspree.complete = cbind(metadata.aspree.complete, local_quantile(metadata.aspree.complete[,temp.var], metadata.aspree.complete$AgeAtCollectionYears, 1.1, metadata.aspree.complete$sex))
metadata.aspree.complete[metadata.aspree.complete[,ncol(metadata.aspree.complete)] < 20, ncol(metadata.aspree.complete)-1] = NA
colnames(metadata.aspree.complete)[ncol(metadata.aspree.complete)-1] = sprintf("%s.quant.lq", temp.var)
colnames(metadata.aspree.complete)[ncol(metadata.aspree.complete)] = sprintf("%s.quant.ls", temp.var)
}
For the target age of 80, the local quantile is based on 293 data points, being ASPREE men with age in [79, 81].
illustr.metadata.aspree.complete.male = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & !is.na(metadata.aspree.complete$mtCNLowDepth.quant.lq),]
range(illustr.metadata.aspree.complete.male$AgeAtCollectionYears)
## [1] 75 89
median(illustr.metadata.aspree.complete.male$AgeAtCollectionYears)
## [1] 80
sum(illustr.metadata.aspree.complete.male$AgeAtCollectionYears == median(illustr.metadata.aspree.complete.male$AgeAtCollectionYears))
## [1] 102
ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GaitSpeedMs)) + geom_point() + geom_smooth(method = "gam")
ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GripStrength.trans)) + geom_point() + geom_smooth(method = "gam")
ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GaitSpeedMs)) + geom_point() + geom_smooth(method = "gam") + coord_cartesian(ylim = c(.8, 1.2))
ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GripStrength.trans)) + geom_point() + geom_smooth(method = "gam") + coord_cartesian(ylim = c(8, 14))
It’s reasonable to model somatic ~ age as linear, thereby considerably simplifying the inversion performed below. I consider this fine for illustrative purposes, which is the goal here. At worst, the slight double-dipping will falsely bias the bootstrap variance downwards by a small amount.
library(boot)
somatic_effect_bootstrap_statistic = function(data, indices, target_age, target_somatic_quant)
{
data = data[indices,]
fit = gam(frailty ~ age + s(somatic_quant), data = data)
preds_frailty_age_mediansom = expand.grid(age = seq(min(data$age), max(data$age), 1), somatic_quant = 0.5)
preds_frailty_targetage_allsom = expand.grid(age = target_age, somatic_quant = target_somatic_quant)
preds_frailty_age_mediansom$frailty_somatic.pred = predict(fit, newdata = preds_frailty_age_mediansom, se.fit = FALSE)
preds_frailty_targetage_allsom$frailty_somatic.pred = predict(fit, newdata = preds_frailty_targetage_allsom, se.fit = FALSE)
frailty2age_mediansomatic = approxfun(x = preds_frailty_age_mediansom$frailty_somatic.pred, y = preds_frailty_age_mediansom$age)
preds_frailty_targetage_allsom$frailtyage_somatic.pred = frailty2age_mediansomatic(preds_frailty_targetage_allsom$frailty_somatic.pred)
preds_frailty_targetage_allsom$age_excess = preds_frailty_targetage_allsom$frailtyage_somatic.pred - preds_frailty_targetage_allsom$age
preds_frailty_targetage_allsom$age_excess
}
somatic_effect_bootstrap = function(age, frailty, somatic_quant, target_age, target_somatic_quant, seed = NA, ...)
{
if (!is.na(seed))
set.seed(seed)
boot(
data = data.frame(age = age, frailty = frailty, somatic_quant = somatic_quant),
statistic = function(data, indices) somatic_effect_bootstrap_statistic(data, indices, target_age, target_somatic_quant),
...)
}
somatic_effect_bootsummary = function(boot_result, mtcn_quantile, ci_levels)
{
bootsummary = expand.grid(mtcn_quantile = mtcn_quantile, ci_level = ci_levels)
bootsummary = ddply(bootsummary, colnames(bootsummary), function(d) {
ci = try(boot.ci(boot_result, conf = d$ci_level, index = which(mtcn_quantile == d$mtcn_quantile), type = "bca"))
d$boot.method = "BCa"
if (class(ci) == "try-error")
{
d$boot.lcl = NA
d$boot.ucl = NA
}
else
{
d$boot.lcl = ci$bca[4]
d$boot.ucl = ci$bca[5]
}
d}, .parallel = TRUE)
bootsummary$statistic = boot_result$t0[match(bootsummary$mtcn_quantile, mtcn_quantile)]
bootsummary
}
illustr.grip_mtcn_bootstrap = somatic_effect_bootstrap(
age = illustr.metadata.aspree.complete.male$AgeAtCollectionYears,
frailty = illustr.metadata.aspree.complete.male$Frailty_GripStrength.trans,
somatic_quant = illustr.metadata.aspree.complete.male$mtCNLowDepth.quant.lq,
target_age = illustr.target_age,
target_somatic_quant = illustr.target_somatic_quant,
seed = 314159,
R = illustr.R)
illustr.grip_mtcn_bootstrap.summary = somatic_effect_bootsummary(illustr.grip_mtcn_bootstrap, mtcn_quantile = illustr.target_somatic_quant, ci_levels = illustr.ci_levels)
ggplot(illustr.grip_mtcn_bootstrap.summary, aes(x = mtcn_quantile, y = statistic, ymin = boot.lcl, ymax = boot.ucl, fill = ordered(ci_level, levels = rev(sort(illustr.ci_levels))))) +
geom_ribbon(alpha = 0.8) +
geom_line(lwd = 1) +
xlab("Mitochondrial depth quantile") +
ylab("Grip strength anomaly (years)") +
ggtitle("Effect on mitochondrial depth on grip strength in men") +
theme_bw() +
scale_fill_brewer() +
guides(fill = guide_legend(title = "CL")) +
geom_rug(data = illustr.metadata.aspree.complete.male, mapping = aes(x = mtCNLowDepth.quant.lq), sides = "b", alpha = 0.2, inherit.aes = FALSE)
# An alterative take, as the bootstrap CIs arguably aren't entirely sensible -- we're
# not proposing a formal test here after all, but rather just a visualisation. In that
# context a HPD interval, which effectively is just a neater way to overplot the bootstrap
# runs, makes more sense.
library(HDInterval)
illustr.hpd_levels = c(0.5, 0.8, 0.9)
illustr.grip_mtcn_bootstrap.summary_hdi = ldply(illustr.hpd_levels, function(credmass) {
hdis = apply(illustr.grip_mtcn_bootstrap$t, 2, hdi, credMass = credmass)
data.frame(
mtcn_quantile = illustr.target_somatic_quant,
credmass = credmass,
hdi.lcl = hdis[1,],
hdi.ucl = hdis[2,],
statistic = illustr.grip_mtcn_bootstrap$t0)
})
ggplot(illustr.grip_mtcn_bootstrap.summary_hdi, aes(x = mtcn_quantile, y = statistic, ymin = hdi.lcl, ymax = hdi.ucl, fill = ordered(credmass, levels = rev(sort(illustr.hpd_levels))))) +
geom_ribbon(alpha = 0.8) +
geom_line(lwd = 1) +
xlab("Mitochondrial depth quantile") +
ylab("Grip strength anomaly (years)") +
ggtitle("Effect on mitochondrial depth on grip strength in men") +
theme_bw() +
scale_fill_brewer() +
guides(fill = guide_legend(title = "Credible mass")) +
geom_rug(data = illustr.metadata.aspree.complete.male, mapping = aes(x = mtCNLowDepth.quant.lq), sides = "b", alpha = 0.2, inherit.aes = FALSE)